Evaluation of RISK survival models
This document highlights the use of
RRPlot(),
CoxRiskCalibration(), and
CalibrationProbPoissonRisk,
for the evaluation (RRPlot), and calibration of cox models
(CoxRiskCalibration) or logistic models (CalibrationProbPoissonRisk) of
survival data.
Furthermore, it can be used to evaluate any Risk index that reruns
the probability of a future event on external data-set.
This document will use the survival::rotterdam, and survival::gbsg
data-sets to train and predict the risk of cancer recurrence after
surgery. Both Cox and Logistic models will be trained and evaluated.
Here are some sample plots returned by the evaluated functions:




The libraries
library(survival)
library(FRESA.CAD)
## Loading required package: Rcpp
## Loading required package: stringr
## Loading required package: miscTools
## Loading required package: Hmisc
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
##
## format.pval, units
## Loading required package: pROC
## Type 'citation("pROC")' for a citation.
##
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
##
## cov, smooth, var
source("~/GitHub/FRESA.CAD/R/RRPlot.R")
source("~/GitHub/FRESA.CAD/R/PoissonEventRiskCalibration.R")
op <- par(no.readonly = TRUE)
pander::panderOptions('digits', 3)
pander::panderOptions('table.split.table', 400)
pander::panderOptions('keep.trailing.zeros',TRUE)
Breast Cancer Royston-Altman data
survival::rotterdam conditioning
gbsgdata <- gbsg
rownames(gbsgdata) <- gbsgdata$pid
gbsgdata$pid <- NULL
odata <-rotterdam
rownames(odata) <- odata$pid
odata$pid <- NULL
odata$rfstime <- odata$rtime
odata$status <- odata$recur
odata$rtime <- NULL
odata$recur <- NULL
odata <- odata[,colnames(odata) %in% colnames(gbsgdata)]
odata$size <- 10*(odata$size=="<=20") +
35*(odata$size=="20-50") +
60*(odata$size==">50")
data <- as.data.frame(model.matrix(Surv(rfstime,status)~.*.,odata))
data$`(Intercept)` <- NULL
dataBrestCancerTrain <- cbind(time=odata[rownames(data),"rfstime"],status=odata[rownames(data),"status"],data)
colnames(dataBrestCancerTrain) <-str_replace_all(colnames(dataBrestCancerTrain),":","_")
colnames(dataBrestCancerTrain) <-str_replace_all(colnames(dataBrestCancerTrain)," ","")
colnames(dataBrestCancerTrain) <-str_replace_all(colnames(dataBrestCancerTrain),"\\.","_")
colnames(dataBrestCancerTrain) <-str_replace_all(colnames(dataBrestCancerTrain),"-","_")
colnames(dataBrestCancerTrain) <-str_replace_all(colnames(dataBrestCancerTrain),">","_")
dataBrestCancerTrain$time <- dataBrestCancerTrain$time/365 ## To years
pander::pander(table(odata[rownames(data),"status"]),caption="rotterdam")
The survival::gbsg data conditioning
gbsgdata <- gbsgdata[,colnames(odata)]
data <- as.data.frame(model.matrix(Surv(rfstime,status)~.*.,gbsgdata))
data$`(Intercept)` <- NULL
dataBrestCancerTest <- cbind(time=gbsgdata[rownames(data),"rfstime"],status=gbsgdata[rownames(data),"status"],data)
colnames(dataBrestCancerTest) <-str_replace_all(colnames(dataBrestCancerTest),":","_")
colnames(dataBrestCancerTest) <-str_replace_all(colnames(dataBrestCancerTest)," ","")
colnames(dataBrestCancerTest) <-str_replace_all(colnames(dataBrestCancerTest),"\\.","_")
colnames(dataBrestCancerTest) <-str_replace_all(colnames(dataBrestCancerTest),"-","_")
colnames(dataBrestCancerTest) <-str_replace_all(colnames(dataBrestCancerTest),">","_")
dataBrestCancerTest$time <- dataBrestCancerTest$time/365
pander::pander(table(odata[rownames(data),"status"]), caption="gbsg")
Cox Modeling
ml <- BSWiMS.model(Surv(time,status)~.,data=dataBrestCancerTrain,loops=0)
sm <- summary(ml)
pander::pander(sm$coefficients)
| size_nodes |
-8.55e-04 |
-0.001403 |
-8.45e-04 |
-0.000231 |
0.624 |
0.644 |
0.646 |
0.629 |
0.645 |
0.647 |
3.27e-03 |
0.33343 |
7.01 |
9.3054 |
-0.00172 |
1 |
| nodes |
1.88e-01 |
0.082234 |
1.85e-01 |
0.294899 |
0.637 |
0.648 |
0.646 |
0.640 |
0.649 |
0.647 |
3.84e-03 |
0.13951 |
5.59 |
3.8202 |
0.00260 |
1 |
| grade |
5.39e-01 |
0.311955 |
5.38e-01 |
0.792299 |
0.565 |
0.645 |
0.646 |
0.561 |
0.646 |
0.647 |
5.54e-03 |
0.11567 |
4.97 |
3.6766 |
-0.00104 |
1 |
| age |
-7.42e-03 |
-0.012659 |
-7.12e-03 |
-0.002620 |
0.513 |
0.629 |
0.646 |
0.513 |
0.629 |
0.647 |
3.48e-03 |
0.09715 |
4.90 |
2.6559 |
-0.01764 |
1 |
| grade_nodes |
-2.91e-02 |
-0.055106 |
-2.89e-02 |
-0.001119 |
0.635 |
0.648 |
0.646 |
0.639 |
0.648 |
0.647 |
1.29e-03 |
-0.08062 |
3.62 |
-2.2618 |
0.00182 |
1 |
| size |
2.41e-02 |
0.000207 |
2.42e-02 |
0.047018 |
0.595 |
0.642 |
0.646 |
0.595 |
0.643 |
0.647 |
1.60e-03 |
0.00156 |
3.41 |
0.0425 |
-0.00392 |
1 |
| size_grade |
-2.92e-03 |
-0.011304 |
-2.97e-03 |
0.005226 |
0.598 |
0.643 |
0.646 |
0.599 |
0.644 |
0.647 |
3.72e-04 |
-0.06105 |
2.33 |
-1.6745 |
-0.00304 |
1 |
| age_nodes |
5.58e-05 |
-0.000656 |
1.15e-05 |
0.001068 |
0.626 |
0.645 |
0.646 |
0.630 |
0.645 |
0.647 |
5.06e-05 |
0.06168 |
2.10 |
1.6845 |
-0.00140 |
1 |
Cox Model Performance
Here we evaluate the model using the RRPlot() function.
The evaluation of the raw Cox model with RRPlot()
Here we will use the predicted event probability assuming a baseline
hazard for events withing 5 years
index <- predict(ml,dataBrestCancerTrain)
timeinterval <- 5 # Five years
h0 <- sum(dataBrestCancerTrain$status & dataBrestCancerTrain$time < timeinterval)
h0 <- h0/nrow(subset(dataBrestCancerTrain,time > timeinterval | status==1))
rdata <- cbind(dataBrestCancerTrain$status,ppoisGzero(index,h0))
rrAnalysisTrain <- RRPlot(rdata,atProb=c(0.90,0.80),
timetoEvent=dataBrestCancerTrain$time,
title="Raw Train: Breast Cancer",
ysurvlim=c(0.00,1.0),
riskTimeInterval=timeinterval)


ROC:
Raw Train: Breast Cancer



As we can see the Observed probability as well as the Time vs. Events
are not calibrated.
Analyzing RRPlot() outputs
pander::pander(t(rrAnalysisTrain$OERatio),caption="O/E Ratio")
pander::pander(t(rrAnalysisTrain$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
| 0.695 |
0.677 |
0.714 |
pander::pander((rrAnalysisTrain$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
| 0.304 |
0.281 |
0.328 |
pander::pander((rrAnalysisTrain$ROCAnalysis$specificity),caption="Specificity")
Specificity
| 0.9 |
0.883 |
0.915 |
pander::pander(rrAnalysisTrain$c.index,caption="C. Index")
| 0.677 |
0.354 |
0.0141 |
2982 |
0 |
1518 |
6184528 |
4188057 |
2703838 |
pander::pander(t(rrAnalysisTrain$thr_atP),caption="Probability Thresholds")
Probability Thresholds
| 0.474 |
0.397 |
pander::pander(t(rrAnalysisTrain$RR_atP),caption="Risk Ratio")
pander::pander(rrAnalysisTrain$sufdif,caption="Logrank test")
Logrank test Chisq = 474.669311 on 2 degrees of freedom, p =
0.000000
| class=0 |
1983 |
812 |
1143 |
96.0 |
393.7 |
| class=1 |
390 |
244 |
175 |
27.1 |
30.7 |
| class=2 |
609 |
462 |
200 |
344.4 |
401.7 |
Cox Calibration
op <- par(no.readonly = TRUE)
calprob <- CoxRiskCalibration(ml,dataBrestCancerTrain,"status","time")
pander::pander(c(h0=calprob$h0,
Gain=calprob$hazardGain,
DeltaTime=calprob$timeInterval),
caption="Cox Calibration Parameters")
Analyzing RRPlot() outputs of the calibrated model
pander::pander(t(rrAnalysisTrain$OERatio),caption="O/E Ratio")
O/E Ratio
| 0.992 |
0.942 |
1.04 |
pander::pander(t(rrAnalysisTrain$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
| 0.695 |
0.677 |
0.714 |
pander::pander((rrAnalysisTrain$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
| 0.304 |
0.281 |
0.328 |
pander::pander((rrAnalysisTrain$ROCAnalysis$specificity),caption="Specificity")
Specificity
| 0.9 |
0.883 |
0.915 |
pander::pander(rrAnalysisTrain$c.index,caption="C. Index")
| 0.677 |
0.354 |
0.0141 |
2982 |
0 |
1518 |
6184528 |
4188057 |
2703838 |
pander::pander(t(rrAnalysisTrain$thr_atP),caption="Probability Thresholds")
Probability Thresholds
| 0.646 |
0.559 |
pander::pander(t(rrAnalysisTrain$RR_atP),caption="Risk Ratio")
pander::pander(rrAnalysisTrain$sufdif,caption="Logrank test")
Logrank test Chisq = 474.669311 on 2 degrees of freedom, p =
0.000000
| class=0 |
1983 |
812 |
1143 |
96.0 |
393.7 |
| class=1 |
390 |
244 |
175 |
27.1 |
30.7 |
| class=2 |
609 |
462 |
200 |
344.4 |
401.7 |
Performance on the external data set
index <- predict(ml,dataBrestCancerTest)
pp <- predictionStats_binary(cbind(dataBrestCancerTest$status,index),plotname="Breast Cancer")
Breast Cancer

par(op)
prob <- ppoisGzero(index,h0)
rdata <- cbind(dataBrestCancerTest$status,prob)
rrAnalysis <- RRPlot(rdata,atThr=rrAnalysisTrain$thr_atP,
timetoEvent=dataBrestCancerTest$time,
title="Test: Breast Cancer",
ysurvlim=c(0.00,1.0),
riskTimeInterval=timeinterval)


ROC:
Test: Breast Cancer



par(op)
The Evaluation results of validation data set
pander::pander(t(rrAnalysis$OERatio),caption="O/E Ratio")
pander::pander(t(rrAnalysis$ROCAnalysis$aucs),caption="ROC AUC")
pander::pander((rrAnalysis$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
| 0.371 |
0.316 |
0.429 |
pander::pander((rrAnalysis$ROCAnalysis$specificity),caption="Specificity")
Specificity
| 0.84 |
0.799 |
0.875 |
pander::pander(rrAnalysis$c.index,caption="C. Index")
| 0.663 |
0.326 |
0.0311 |
686 |
0 |
299 |
266144 |
176497 |
203702 |
pander::pander(t(rrAnalysis$thr_atP),caption="Probability Thresholds")
Probability Thresholds
| 0.646 |
0.559 |
pander::pander(t(rrAnalysis$RR_atP),caption="Risk Ratio")
Risk Ratio
| 1.76 |
1.51 |
2.07 |
pander::pander(rrAnalysis$sufdif,caption="Logrank test")
Logrank test Chisq = 74.963210 on 2 degrees of freedom, p =
0.000000
| class=0 |
433 |
152 |
210.6 |
16.315 |
55.983 |
| class=1 |
80 |
36 |
33.4 |
0.203 |
0.229 |
| class=2 |
173 |
111 |
55.0 |
57.067 |
71.183 |
Calibrating the index on the test data
calprob <- CoxRiskCalibration(ml,dataBrestCancerTest,"status","time")
rdata <- cbind(dataBrestCancerTest$status,calprob$prob)
rrAnalysis <- RRPlot(rdata,atProb=c(0.90,0.80),
timetoEvent=dataBrestCancerTest$time,
title="Recalibrated Test: Breast Cancer",
ysurvlim=c(0.00,1.0),
riskTimeInterval=calprob$timeInterval)


ROC:
Recalibrated Test: Breast Cancer



The Evaluation results of calibrated model on test data set
pander::pander(t(rrAnalysis$OERatio),caption="O/E Ratio")
O/E Ratio
| 1.01 |
0.901 |
1.13 |
pander::pander(t(rrAnalysis$ROCAnalysis$aucs),caption="ROC AUC")
pander::pander((rrAnalysis$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
| 0.284 |
0.234 |
0.339 |
pander::pander((rrAnalysis$ROCAnalysis$specificity),caption="Specificity")
Specificity
| 0.899 |
0.865 |
0.927 |
pander::pander(rrAnalysis$c.index,caption="C. Index")
| 0.663 |
0.326 |
0.0311 |
686 |
0 |
299 |
266144 |
176497 |
203702 |
pander::pander(t(rrAnalysis$thr_atP),caption="Probability Thresholds")
Probability Thresholds
| 0.61 |
0.502 |
pander::pander(t(rrAnalysis$RR_atP),caption="Risk Ratio")
pander::pander(rrAnalysis$sufdif,caption="Logrank test")
Logrank test Chisq = 80.872386 on 2 degrees of freedom, p =
0.000000
| class=0 |
486 |
177 |
234.2 |
13.95 |
65.72 |
| class=1 |
76 |
37 |
27.9 |
2.98 |
3.31 |
| class=2 |
124 |
85 |
37.0 |
62.43 |
72.18 |
Logistic Model
Here we train a logistic model on the same data set
## Only label subjects that present event withing five years
dataBrestCancerR <- subset(dataBrestCancerTrain, time>=5 | status==1)
dataBrestCancerR$status <- dataBrestCancerR$status * (dataBrestCancerR$time < 5)
dataBrestCancerR$time <- NULL
ml <- BSWiMS.model(status~1,data=dataBrestCancerR,loops=0)
sm <- summary(ml)
pander::pander(sm$coefficients)
| grade_nodes |
0.061410 |
0.049466 |
0.06285 |
0.078896 |
0.682 |
0.637 |
0.686 |
0.649 |
0.624 |
0.655 |
0.06580 |
0.549 |
13.66 |
15.65 |
-0.03109 |
1 |
| size_grade |
0.007168 |
0.005401 |
0.00746 |
0.009073 |
0.632 |
0.682 |
0.686 |
0.626 |
0.646 |
0.655 |
0.01787 |
0.294 |
6.74 |
7.73 |
-0.00865 |
1 |
| nodes_hormon |
-0.056681 |
-0.108718 |
-0.06026 |
-0.012566 |
0.587 |
0.688 |
0.686 |
0.526 |
0.658 |
0.655 |
0.00280 |
0.455 |
3.44 |
12.15 |
0.00285 |
1 |
| pgr |
-0.000438 |
-0.000787 |
-0.00044 |
-0.000134 |
0.571 |
0.689 |
0.686 |
0.500 |
0.659 |
0.655 |
0.00257 |
0.198 |
2.64 |
5.75 |
0.00412 |
1 |
Logistic Model Performance
op <- par(no.readonly = TRUE)
## No-calibrated
rdata <- cbind(dataBrestCancerTrain$status,predict(ml,dataBrestCancerTrain))
rrAnalysisTrain <- RRPlot(rdata,atProb=c(0.90,0.80),
timetoEvent=dataBrestCancerTrain$time,
title="Logistic Train: Breast Cancer",
ysurvlim=c(0.00,1.0),
riskTimeInterval=5.0)


ROC:
Logistic Train: Breast Cancer



par(op)
The Evaluaiton of the training
pander::pander(t(rrAnalysisTrain$OERatio),caption="O/E Ratio")
O/E Ratio
| 0.965 |
0.917 |
1.01 |
pander::pander(t(rrAnalysisTrain$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
| 0.687 |
0.668 |
0.706 |
pander::pander((rrAnalysisTrain$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
| 0.323 |
0.3 |
0.348 |
pander::pander((rrAnalysisTrain$ROCAnalysis$specificity),caption="Specificity")
Specificity
| 0.9 |
0.883 |
0.915 |
pander::pander(rrAnalysisTrain$c.index,caption="C. Index")
| 0.676 |
0.353 |
0.0141 |
2982 |
0 |
1518 |
6184528 |
4183317 |
2703838 |
pander::pander(t(rrAnalysisTrain$thr_atP),caption="Probability Thresholds")
Probability Thresholds
| 0.551 |
0.438 |
pander::pander(t(rrAnalysisTrain$RR_atP),caption="Risk Ratio")
Risk Ratio
| 1.76 |
1.65 |
1.87 |
pander::pander(rrAnalysisTrain$sufdif,caption="Logrank test")
Logrank test Chisq = 544.494088 on 2 degrees of freedom, p =
0.000000
| class=0 |
1971 |
800 |
1145 |
103.8 |
428.3 |
| class=1 |
373 |
227 |
171 |
18.1 |
20.5 |
| class=2 |
638 |
491 |
202 |
413.1 |
483.7 |
The evaluation of the validation
pander::pander(t(rrAnalysis$OERatio),caption="O/E Ratio")
pander::pander(t(rrAnalysis$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
| 0.677 |
0.638 |
0.717 |
pander::pander((rrAnalysis$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
| 0.331 |
0.278 |
0.388 |
pander::pander((rrAnalysis$ROCAnalysis$specificity),caption="Specificity")
Specificity
| 0.884 |
0.848 |
0.914 |
pander::pander(rrAnalysis$c.index,caption="C. Index")
| 0.681 |
0.361 |
0.0304 |
686 |
0 |
299 |
266144 |
181160 |
203702 |
pander::pander(t(rrAnalysis$thr_atP),caption="Probability Thresholds")
Probability Thresholds
| 0.551 |
0.438 |
pander::pander(t(rrAnalysis$RR_atP),caption="Risk Ratio")
pander::pander(rrAnalysis$sufdif,caption="Logrank test")
Logrank test Chisq = 101.719564 on 2 degrees of freedom, p =
0.000000
| class=0 |
427 |
147 |
211.4 |
19.6 |
68.2 |
| class=1 |
115 |
53 |
45.9 |
1.1 |
1.3 |
| class=2 |
144 |
99 |
41.7 |
78.9 |
93.3 |
Logistic Model Poisson Calibration
riskdata <- cbind(dataBrestCancerTrain$status,predict(ml,dataBrestCancerTrain),dataBrestCancerTrain$time)
calprob <- CalibrationProbPoissonRisk(riskdata)
timeinterval <- calprob$timeInterval;
gain <- calprob$hazardGain
rdata <- cbind(dataBrestCancerTrain$status,calprob$prob)
rrAnalysisTrain <- RRPlot(rdata,atProb=c(0.90,0.80),
timetoEvent=dataBrestCancerTrain$time,
title="Adj Logistic Train: Breast Cancer",
ysurvlim=c(0.00,1.0),
riskTimeInterval=timeinterval)


ROC:
Adj Logistic Train: Breast Cancer



par(op)
The Evaluaiton of the calibrated logistic: training
pander::pander(t(rrAnalysisTrain$OERatio),caption="O/E Ratio")
O/E Ratio
| 0.994 |
0.945 |
1.05 |
pander::pander(t(rrAnalysisTrain$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
| 0.687 |
0.668 |
0.706 |
pander::pander((rrAnalysisTrain$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
| 0.323 |
0.3 |
0.348 |
pander::pander((rrAnalysisTrain$ROCAnalysis$specificity),caption="Specificity")
Specificity
| 0.9 |
0.883 |
0.915 |
pander::pander(rrAnalysisTrain$c.index,caption="C. Index")
| 0.676 |
0.353 |
0.0141 |
2982 |
0 |
1518 |
6184528 |
4183317 |
2703838 |
pander::pander(t(rrAnalysisTrain$thr_atP),caption="Probability Thresholds")
Probability Thresholds
| 0.649 |
0.529 |
pander::pander(t(rrAnalysisTrain$RR_atP),caption="Risk Ratio")
Risk Ratio
| 1.76 |
1.65 |
1.87 |
pander::pander(rrAnalysisTrain$sufdif,caption="Logrank test")
Logrank test Chisq = 544.494088 on 2 degrees of freedom, p =
0.000000
| class=0 |
1971 |
800 |
1145 |
103.8 |
428.3 |
| class=1 |
373 |
227 |
171 |
18.1 |
20.5 |
| class=2 |
638 |
491 |
202 |
413.1 |
483.7 |
probLog <- predict(ml,dataBrestCancerTest)
aprob <- adjustProb(probLog,gain)
rdata <- cbind(dataBrestCancerTest$status,aprob)
rrAnalysis <- RRPlot(rdata,atThr=rrAnalysisTrain$thr_atP,
timetoEvent=dataBrestCancerTest$time,
title="Adj Logistic Test: Breast Cancer",
ysurvlim=c(0.00,1.0),
riskTimeInterval=timeinterval)


ROC:
Adj Logistic Test: Breast Cancer



The evaluation of the calibrated validation
pander::pander(t(rrAnalysis$OERatio),caption="O/E Ratio")
pander::pander(t(rrAnalysis$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
| 0.677 |
0.638 |
0.717 |
pander::pander((rrAnalysis$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
| 0.331 |
0.278 |
0.388 |
pander::pander((rrAnalysis$ROCAnalysis$specificity),caption="Specificity")
Specificity
| 0.884 |
0.848 |
0.914 |
pander::pander(rrAnalysis$c.index,caption="C. Index")
| 0.681 |
0.361 |
0.0304 |
686 |
0 |
299 |
266144 |
181160 |
203702 |
pander::pander(t(rrAnalysis$thr_atP),caption="Probability Thresholds")
Probability Thresholds
| 0.649 |
0.529 |
pander::pander(t(rrAnalysis$RR_atP),caption="Risk Ratio")
pander::pander(rrAnalysis$sufdif,caption="Logrank test")
Logrank test Chisq = 101.719564 on 2 degrees of freedom, p =
0.000000
| class=0 |
427 |
147 |
211.4 |
19.6 |
68.2 |
| class=1 |
115 |
53 |
45.9 |
1.1 |
1.3 |
| class=2 |
144 |
99 |
41.7 |
78.9 |
93.3 |